Merge pull request #398 from knu/imap_use_uid

Use "last seen UID" in ImapFolderAgent

Andrew Cantino 10 年之前
父節點
當前提交
6ffa528ab0
共有 3 個文件被更改,包括 196 次插入65 次删除
  1. 13 0
      app/models/agent.rb
  2. 149 47
      app/models/agents/imap_folder_agent.rb
  3. 34 18
      spec/models/agents/imap_folder_agent_spec.rb

+ 13 - 0
app/models/agent.rb

@@ -225,6 +225,19 @@ class Agent < ActiveRecord::Base
225 225
     # Implement me in your subclass to test for valid options.
226 226
   end
227 227
 
228
+  # Utility Methods
229
+
230
+  def boolify(option_value)
231
+    case option_value
232
+    when true, 'true'
233
+      true
234
+    when false, 'false'
235
+      false
236
+    else
237
+      nil
238
+    end
239
+  end
240
+
228 241
   # Class Methods
229 242
 
230 243
   class << self

+ 149 - 47
app/models/agents/imap_folder_agent.rb

@@ -11,7 +11,9 @@ module Agents
11 11
     description <<-MD
12 12
 
13 13
       The ImapFolderAgent checks an IMAP server in specified folders
14
-      and creates Events based on new unread mails.
14
+      and creates Events based on new mails found since the last run.
15
+      In the first visit to a foler, this agent only checks for the
16
+      initial status and does not create events.
15 17
 
16 18
       Specify an IMAP server to connect with `host`, and set `ssl` to
17 19
       true if the server supports IMAP over SSL.  Specify `port` if
@@ -65,6 +67,13 @@ module Agents
65 67
           body.  The default value is `['text/plain', 'text/enriched',
66 68
           'text/html']`.
67 69
 
70
+      - "is_unread"
71
+
72
+          Setting this to true or false means only mails that is
73
+          marked as unread or read respectively, are selected.
74
+
75
+          If this key is unspecified or set to null, it is ignored.
76
+
68 77
       - "has_attachment"
69 78
 
70 79
           Setting this to true or false means only mails that does or does
@@ -74,13 +83,16 @@ module Agents
74 83
 
75 84
       Set `mark_as_read` to true to mark found mails as read.
76 85
 
77
-      Each agent instance memorizes a list of unread mails that are
78
-      found in the last run, so even if you change a set of conditions
79
-      so that it matches mails that are missed previously, they will
80
-      not show up as new events.  Also, in order to avoid duplicated
81
-      notification it keeps a list of Message-Id's of 100 most recent
82
-      mails, so if multiple mails of the same Message-Id are found,
83
-      you will only see one event out of them.
86
+      Each agent instance memorizes the highest UID of mails that are
87
+      found in the last run for each watched folder, so even if you
88
+      change a set of conditions so that it matches mails that are
89
+      missed previously, or if you alter the flag status of already
90
+      found mails, they will not show up as new events.
91
+
92
+      Also, in order to avoid duplicated notification it keeps a list
93
+      of Message-Id's of 100 most recent mails, so if multiple mails
94
+      of the same Message-Id are found, you will only see one event
95
+      out of them.
84 96
     MD
85 97
 
86 98
     event_description <<-MD
@@ -138,9 +150,7 @@ module Agents
138 150
 
139 151
       %w[ssl mark_as_read].each { |key|
140 152
         if options[key].present?
141
-          case options[key]
142
-          when true, false
143
-          else
153
+          if boolify(options[key]).nil?
144 154
             errors.add(:base, '%s must be a boolean value' % key)
145 155
           end
146 156
         end
@@ -173,7 +183,6 @@ module Agents
173 183
       end
174 184
 
175 185
       case conditions = options['conditions']
176
-      when nil
177 186
       when Hash
178 187
         conditions.each { |key, value|
179 188
           value.present? or next
@@ -202,8 +211,8 @@ module Agents
202 211
                 errors.add(:base, 'conditions.%s contains a non-string object' % key)
203 212
               end
204 213
             }
205
-          when 'has_attachment'
206
-            case value
214
+          when 'is_unread', 'has_attachment'
215
+            case boolify(value)
207 216
             when true, false
208 217
             else
209 218
               errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
@@ -220,22 +229,8 @@ module Agents
220 229
     end
221 230
 
222 231
     def check
223
-      # 'seen' keeps a hash of { uidvalidity => uids, ... } which
224
-      # lists unread mails in watched folders.
225
-      seen = memory['seen'] || {}
226
-      new_seen = Hash.new { |hash, key|
227
-        hash[key] = []
228
-      }
229
-
230
-      # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
231
-      # most recent notified mails.
232
-      notified = memory['notified'] || []
233
-
234
-      each_unread_mail { |mail|
235
-        new_seen[mail.uidvalidity] << mail.uid
236
-
237
-        next if (uids = seen[mail.uidvalidity]) && uids.include?(mail.uid)
238
-
232
+      each_unread_mail { |mail, notified|
233
+        message_id = mail.message_id
239 234
         body_parts = mail.body_parts(mime_types)
240 235
         matched_part = nil
241 236
         matches = {}
@@ -274,14 +269,18 @@ module Agents
274 269
               }
275 270
             }
276 271
           when 'has_attachment'
277
-            value == mail.has_attachment?
272
+            boolify(value) == mail.has_attachment?
273
+          when 'is_unread'
274
+            true  # already filtered out by each_unread_mail
278 275
           else
279 276
             log 'Unknown condition key ignored: %s' % key
280 277
             true
281 278
           end
282 279
         } or next
283 280
 
284
-        unless notified.include?(mail.message_id)
281
+        if notified.include?(mail.message_id)
282
+          log 'Ignoring mail: %s (already notified)' % message_id
283
+        else
285 284
           matched_part ||= body_parts.first
286 285
 
287 286
           if matched_part
@@ -292,6 +291,8 @@ module Agents
292 291
             body = ''
293 292
           end
294 293
 
294
+          log 'Emitting an event for mail: %s' % message_id
295
+
295 296
           create_event :payload => {
296 297
             'folder' => mail.folder,
297 298
             'subject' => mail.subject,
@@ -308,43 +309,86 @@ module Agents
308 309
           notified << mail.message_id if mail.message_id
309 310
         end
310 311
 
311
-        if interpolated['mark_as_read']
312
+        if boolify(interpolated['mark_as_read'])
312 313
           log 'Marking as read'
313 314
           mail.mark_as_read
314 315
         end
315 316
       }
316
-
317
-      notified.slice!(0...-IDCACHE_SIZE) if notified.size > IDCACHE_SIZE
318
-
319
-      memory['seen'] = new_seen
320
-      memory['notified'] = notified
321
-      save!
322 317
     end
323 318
 
324 319
     def each_unread_mail
325 320
       host, port, ssl, username = interpolated.values_at(:host, :port, :ssl, :username)
321
+      ssl = boolify(ssl)
322
+      port = (Integer(port) if port.present?)
326 323
 
327 324
       log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
328
-      Client.open(host, Integer(port), ssl) { |imap|
325
+      Client.open(host, port, ssl) { |imap|
329 326
         log "Logging in as #{username}"
330 327
         imap.login(username, interpolated[:password])
331 328
 
329
+        # 'lastseen' keeps a hash of { uidvalidity => lastseenuid, ... }
330
+        lastseen, seen = self.lastseen, self.make_seen
331
+
332
+        # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
333
+        # most recent notified mails.
334
+        notified = self.notified
335
+
332 336
         interpolated['folders'].each { |folder|
333 337
           log "Selecting the folder: %s" % folder
334 338
 
335 339
           imap.select(folder)
340
+          uidvalidity = imap.uidvalidity
341
+
342
+          lastseenuid = lastseen[uidvalidity]
336 343
 
337
-          unseen = imap.search('UNSEEN')
344
+          if lastseenuid.nil?
345
+            maxseq = imap.responses['EXISTS'].last
346
+
347
+            log "Recording the initial status: %s" % pluralize(maxseq, 'existing mail')
348
+
349
+            if maxseq > 0
350
+              seen[uidvalidity] = imap.fetch(maxseq, 'UID').last.attr['UID']
351
+            end
338 352
 
339
-          if unseen.empty?
340
-            log "No unread mails"
341 353
             next
342 354
           end
343 355
 
344
-          imap.fetch_mails(unseen).each { |mail|
345
-            yield mail
356
+          seen[uidvalidity] = lastseenuid
357
+          is_unread = boolify(interpolated['conditions']['is_unread'])
358
+
359
+          uids = imap.uid_fetch((lastseenuid + 1)..-1, 'FLAGS').
360
+                 each_with_object([]) { |data, ret|
361
+            uid, flags = data.attr.values_at('UID', 'FLAGS')
362
+            seen[uidvalidity] = uid
363
+            next if uid <= lastseenuid
364
+
365
+            case is_unread
366
+            when nil, !flags.include?(:Seen)
367
+              ret << uid
368
+            end
369
+          }
370
+
371
+          log pluralize(uids.size,
372
+                        case is_unread
373
+                        when true
374
+                          'new unread mail'
375
+                        when false
376
+                          'new read mail'
377
+                        else
378
+                          'new mail'
379
+                        end)
380
+
381
+          next if uids.empty?
382
+
383
+          imap.uid_fetch_mails(uids).each { |mail|
384
+            yield mail, notified
346 385
           }
347 386
         }
387
+
388
+        self.notified = notified
389
+        self.lastseen = seen
390
+
391
+        save!
348 392
       }
349 393
     ensure
350 394
       log 'Connection closed'
@@ -354,6 +398,27 @@ module Agents
354 398
       interpolated['mime_types'] || %w[text/plain text/enriched text/html]
355 399
     end
356 400
 
401
+    def lastseen
402
+      Seen.new(memory['lastseen'])
403
+    end
404
+
405
+    def lastseen= value
406
+      memory.delete('seen')  # obsolete key
407
+      memory['lastseen'] = value
408
+    end
409
+
410
+    def make_seen
411
+      Seen.new
412
+    end
413
+
414
+    def notified
415
+      Notified.new(memory['notified'])
416
+    end
417
+
418
+    def notified= value
419
+      memory['notified'] = value
420
+    end
421
+
357 422
     private
358 423
 
359 424
     def is_positive_integer?(value)
@@ -366,6 +431,10 @@ module Agents
366 431
       File.fnmatch?(pattern, value, FNM_FLAGS)
367 432
     end
368 433
 
434
+    def pluralize(count, noun)
435
+      "%d %s" % [count, noun.pluralize(count)]
436
+    end
437
+
369 438
     class Client < ::Net::IMAP
370 439
       class << self
371 440
         def open(host, port, ssl)
@@ -376,19 +445,52 @@ module Agents
376 445
         end
377 446
       end
378 447
 
448
+      attr_reader :uidvalidity
449
+
379 450
       def select(folder)
380 451
         ret = super(@folder = folder)
381 452
         @uidvalidity = responses['UIDVALIDITY'].last
382 453
         ret
383 454
       end
384 455
 
385
-      def fetch_mails(set)
386
-        fetch(set, %w[UID RFC822.HEADER]).map { |data|
456
+      def uid_fetch_mails(set)
457
+        uid_fetch(set, 'RFC822.HEADER').map { |data|
387 458
           Message.new(self, data, folder: @folder, uidvalidity: @uidvalidity)
388 459
         }
389 460
       end
390 461
     end
391 462
 
463
+    class Seen < Hash
464
+      def initialize(hash = nil)
465
+        super()
466
+        if hash
467
+          # Deserialize a JSON hash which keys are strings
468
+          hash.each { |uidvalidity, uid|
469
+            self[uidvalidity.to_i] = uid
470
+          }
471
+        end
472
+      end
473
+
474
+      def []=(uidvalidity, uid)
475
+        # Update only if the new value is larger than the current value
476
+        if (curr = self[uidvalidity]).nil? || curr <= uid
477
+          super
478
+        end
479
+      end
480
+    end
481
+
482
+    class Notified < Array
483
+      def initialize(array = nil)
484
+        super()
485
+        replace(array) if array
486
+      end
487
+
488
+      def <<(value)
489
+        slice!(0...-IDCACHE_SIZE) if size > IDCACHE_SIZE
490
+        super
491
+      end
492
+    end
493
+
392 494
     class Message < SimpleDelegator
393 495
       DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
394 496
 

+ 34 - 18
spec/models/agents/imap_folder_agent_spec.rb

@@ -24,7 +24,7 @@ describe Agents::ImapFolderAgent do
24 24
         end
25 25
 
26 26
         def uidvalidity
27
-          '100'
27
+          100
28 28
         end
29 29
 
30 30
         def has_attachment?
@@ -53,7 +53,15 @@ describe Agents::ImapFolderAgent do
53 53
       ]
54 54
 
55 55
       stub(@checker).each_unread_mail.returns { |yielder|
56
-        @mails.each(&yielder)
56
+        seen = @checker.lastseen
57
+        notified = @checker.notified
58
+        @mails.each_with_object(notified) { |mail|
59
+          yielder[mail, notified]
60
+          seen[mail.uidvalidity] = mail.uid
61
+        }
62
+        @checker.lastseen = seen
63
+        @checker.notified = notified
64
+        nil
57 65
       }
58 66
 
59 67
       @payloads = [
@@ -110,11 +118,19 @@ describe Agents::ImapFolderAgent do
110 118
       end
111 119
 
112 120
       it 'should validate the boolean fields' do
113
-        @checker.options['ssl'] = false
114
-        @checker.should be_valid
121
+        %w[ssl mark_as_read].each do |key|
122
+          @checker.options[key] = 1
123
+          @checker.should_not be_valid
115 124
 
116
-        @checker.options['ssl'] = 'true'
117
-        @checker.should_not be_valid
125
+          @checker.options[key] = false
126
+          @checker.should be_valid
127
+
128
+          @checker.options[key] = 'true'
129
+          @checker.should be_valid
130
+
131
+          @checker.options[key] = ''
132
+          @checker.should be_valid
133
+        end
118 134
       end
119 135
 
120 136
       it 'should validate regexp conditions' do
@@ -139,9 +155,9 @@ describe Agents::ImapFolderAgent do
139 155
     describe '#check' do
140 156
       it 'should check for mails and save memory' do
141 157
         lambda { @checker.check }.should change { Event.count }.by(2)
142
-        @checker.memory['notified'].sort.should == @mails.map(&:message_id).sort
143
-        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
144
-          (seen[mail.uidvalidity] ||= []) << mail.uid
158
+        @checker.notified.sort.should == @mails.map(&:message_id).sort
159
+        @checker.lastseen.should == @mails.each_with_object(@checker.make_seen) { |mail, seen|
160
+          seen[mail.uidvalidity] = mail.uid
145 161
         }
146 162
 
147 163
         Event.last(2).map(&:payload) == @payloads
@@ -153,9 +169,9 @@ describe Agents::ImapFolderAgent do
153 169
         @checker.options['conditions']['to'] = 'John.Doe@*'
154 170
 
155 171
         lambda { @checker.check }.should change { Event.count }.by(1)
156
-        @checker.memory['notified'].sort.should == [@mails.first.message_id]
157
-        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
158
-          (seen[mail.uidvalidity] ||= []) << mail.uid
172
+        @checker.notified.sort.should == [@mails.first.message_id]
173
+        @checker.lastseen.should == @mails.each_with_object(@checker.make_seen) { |mail, seen|
174
+          seen[mail.uidvalidity] = mail.uid
159 175
         }
160 176
 
161 177
         Event.last.payload.should == @payloads.first
@@ -170,9 +186,9 @@ describe Agents::ImapFolderAgent do
170 186
         )
171 187
 
172 188
         lambda { @checker.check }.should change { Event.count }.by(1)
173
-        @checker.memory['notified'].sort.should == [@mails.last.message_id]
174
-        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
175
-          (seen[mail.uidvalidity] ||= []) << mail.uid
189
+        @checker.notified.sort.should == [@mails.last.message_id]
190
+        @checker.lastseen.should == @mails.each_with_object(@checker.make_seen) { |mail, seen|
191
+          seen[mail.uidvalidity] = mail.uid
176 192
         }
177 193
 
178 194
         Event.last.payload.should == @payloads.last.update(
@@ -208,9 +224,9 @@ describe Agents::ImapFolderAgent do
208 224
         )
209 225
 
210 226
         lambda { @checker.check }.should_not change { Event.count }
211
-        @checker.memory['notified'].sort.should == []
212
-        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
213
-          (seen[mail.uidvalidity] ||= []) << mail.uid
227
+        @checker.notified.sort.should == []
228
+        @checker.lastseen.should == @mails.each_with_object(@checker.make_seen) { |mail, seen|
229
+          seen[mail.uidvalidity] = mail.uid
214 230
         }
215 231
       end
216 232